Move cpu_{sibling,core}_map into per-CPU space
authorKeir Fraser <keir.fraser@citrix.com>
Mon, 13 Jul 2009 10:45:31 +0000 (11:45 +0100)
committerKeir Fraser <keir.fraser@citrix.com>
Mon, 13 Jul 2009 10:45:31 +0000 (11:45 +0100)
These cpu maps get read from all CPUs, so apart from addressing the
square(nr_cpus) growth of these objects, they also get moved into the
previously introduced read-mostly sub-section of the per-CPU section,
in order to not need to waste a full cacheline in order to align (and
properly pad) them, which would be undue overhead on systems with low
NR_CPUS.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
xen/arch/ia64/linux-xen/setup.c
xen/arch/ia64/linux-xen/smpboot.c
xen/arch/ia64/xen/dom0_ops.c
xen/arch/x86/oprofile/op_model_p4.c
xen/arch/x86/smpboot.c
xen/arch/x86/sysctl.c
xen/common/domctl.c
xen/common/sched_credit.c
xen/include/asm-ia64/linux-xen/asm/smp.h
xen/include/asm-x86/smp.h

index 822d5fb6800c5432993cdcaaa77ec145ad3eb9a7..b767a500e37d608481386aeda63e1d3bc840e329 100644 (file)
@@ -577,8 +577,8 @@ late_setup_arch (char **cmdline_p)
 
        cpu_physical_id(0) = hard_smp_processor_id();
 
-       cpu_set(0, cpu_sibling_map[0]);
-       cpu_set(0, cpu_core_map[0]);
+       cpu_set(0, per_cpu(cpu_sibling_map, 0));
+       cpu_set(0, per_cpu(cpu_core_map, 0));
 
        check_for_logical_procs();
        if (smp_num_cpucores > 1)
index 38f761796dff76dda46e3b7a2e51df466245aa7d..f9cd243b3f635b07a1a7c1f2d8f98d9ca831d64e 100644 (file)
@@ -144,8 +144,8 @@ EXPORT_SYMBOL(cpu_online_map);
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 
-cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_core_map);
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_sibling_map);
 int smp_num_siblings = 1;
 int smp_num_cpucores = 1;
 
@@ -686,13 +686,13 @@ clear_cpu_sibling_map(int cpu)
 {
        int i;
 
-       for_each_cpu_mask(i, cpu_sibling_map[cpu])
-               cpu_clear(cpu, cpu_sibling_map[i]);
-       for_each_cpu_mask(i, cpu_core_map[cpu])
-               cpu_clear(cpu, cpu_core_map[i]);
+       for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
+               cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
+       for_each_cpu_mask(i, per_cpu(cpu_core_map, cpu))
+               cpu_clear(cpu, per_cpu(cpu_core_map, i));
 
-       cpus_clear(cpu_sibling_map[cpu]);
-       cpus_clear(cpu_core_map[cpu]);
+       cpus_clear(per_cpu(cpu_sibling_map, cpu));
+       cpus_clear(per_cpu(cpu_core_map, cpu));
 }
 
 static void
@@ -702,12 +702,12 @@ remove_siblinginfo(int cpu)
 
        if (cpu_data(cpu)->threads_per_core == 1 &&
            cpu_data(cpu)->cores_per_socket == 1) {
-               cpu_clear(cpu, cpu_core_map[cpu]);
-               cpu_clear(cpu, cpu_sibling_map[cpu]);
+               cpu_clear(cpu, per_cpu(cpu_core_map, cpu));
+               cpu_clear(cpu, per_cpu(cpu_sibling_map, cpu));
                return;
        }
 
-       last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0);
+       last = (cpus_weight(per_cpu(cpu_core_map, cpu)) == 1);
 
        /* remove it from all sibling map's */
        clear_cpu_sibling_map(cpu);
@@ -800,11 +800,11 @@ set_cpu_sibling_map(int cpu)
 
        for_each_online_cpu(i) {
                if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) {
-                       cpu_set(i, cpu_core_map[cpu]);
-                       cpu_set(cpu, cpu_core_map[i]);
+                       cpu_set(i, per_cpu(cpu_core_map, cpu));
+                       cpu_set(cpu, per_cpu(cpu_core_map, i));
                        if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
-                               cpu_set(i, cpu_sibling_map[cpu]);
-                               cpu_set(cpu, cpu_sibling_map[i]);
+                               cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+                               cpu_set(cpu, per_cpu(cpu_sibling_map, i));
                        }
                }
        }
@@ -835,8 +835,8 @@ __cpu_up (unsigned int cpu)
 
        if (cpu_data(cpu)->threads_per_core == 1 &&
            cpu_data(cpu)->cores_per_socket == 1) {
-               cpu_set(cpu, cpu_sibling_map[cpu]);
-               cpu_set(cpu, cpu_core_map[cpu]);
+               cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
+               cpu_set(cpu, per_cpu(cpu_core_map, cpu));
                return 0;
        }
 
index 37e9b9fefbf8c3ea621f939cb5df614e09ad39af..26cd19a96f30d53330196c4da440143f7998391a 100644 (file)
@@ -711,9 +711,9 @@ long arch_do_sysctl(xen_sysctl_t *op, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
 
         memset(pi, 0, sizeof(*pi));
         pi->cpu_to_node = cpu_to_node_arr;
-        pi->threads_per_core = cpus_weight(cpu_sibling_map[0]);
+        pi->threads_per_core = cpus_weight(per_cpu(cpu_sibling_map, 0));
         pi->cores_per_socket =
-            cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
+            cpus_weight(per_cpu(cpu_core_map, 0)) / pi->threads_per_core;
         pi->nr_cpus          = (u32)num_online_cpus();
         pi->nr_nodes         = num_online_nodes();
         pi->total_pages      = total_pages; 
index 589fdab4bf940886ed15163a763e2ae1088bd6a6..c3ea7a7074c18d23c3485519193ec6341f977366 100644 (file)
@@ -385,7 +385,7 @@ static unsigned int get_stagger(void)
 {
 #ifdef CONFIG_SMP
        int cpu = smp_processor_id();
-       return (cpu != first_cpu(cpu_sibling_map[cpu]));
+       return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
 #endif 
        return 0;
 }
index 976fba89bd0f8f8bd01726c503882f3b57e4ecd7..5a18b9be1af10a0d9db8c2c319ab2ea3805a3d84 100644 (file)
@@ -70,12 +70,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 
 /* representing HT siblings of each logical CPU */
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_sibling_map);
-
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_sibling_map);
 /* representing HT and core siblings of each logical CPU */
-cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_core_map);
 
 /* bitmap of online cpus */
 cpumask_t cpu_online_map __read_mostly;
@@ -419,35 +416,35 @@ set_cpu_sibling_map(int cpu)
                for_each_cpu_mask(i, cpu_sibling_setup_map) {
                        if (phys_proc_id[cpu] == phys_proc_id[i] &&
                            cpu_core_id[cpu] == cpu_core_id[i]) {
-                               cpu_set(i, cpu_sibling_map[cpu]);
-                               cpu_set(cpu, cpu_sibling_map[i]);
-                               cpu_set(i, cpu_core_map[cpu]);
-                               cpu_set(cpu, cpu_core_map[i]);
+                               cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+                               cpu_set(cpu, per_cpu(cpu_sibling_map, i));
+                               cpu_set(i, per_cpu(cpu_core_map, cpu));
+                               cpu_set(cpu, per_cpu(cpu_core_map, i));
                        }
                }
        } else {
-               cpu_set(cpu, cpu_sibling_map[cpu]);
+               cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
        }
 
        if (c[cpu].x86_max_cores == 1) {
-               cpu_core_map[cpu] = cpu_sibling_map[cpu];
+               per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
                c[cpu].booted_cores = 1;
                return;
        }
 
        for_each_cpu_mask(i, cpu_sibling_setup_map) {
                if (phys_proc_id[cpu] == phys_proc_id[i]) {
-                       cpu_set(i, cpu_core_map[cpu]);
-                       cpu_set(cpu, cpu_core_map[i]);
+                       cpu_set(i, per_cpu(cpu_core_map, cpu));
+                       cpu_set(cpu, per_cpu(cpu_core_map, i));
                        /*
                         *  Does this new cpu bringup a new core?
                         */
-                       if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+                       if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
                                /*
                                 * for each core in package, increment
                                 * the booted_cores for this new cpu
                                 */
-                               if (first_cpu(cpu_sibling_map[i]) == i)
+                               if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
                                        c[cpu].booted_cores++;
                                /*
                                 * increment the core count for all
@@ -1052,8 +1049,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
                        printk(KERN_NOTICE "Local APIC not detected."
                                           " Using dummy APIC emulation.\n");
                map_cpu_to_logical_apicid();
-               cpu_set(0, cpu_sibling_map[0]);
-               cpu_set(0, cpu_core_map[0]);
+               cpu_set(0, per_cpu(cpu_sibling_map, 0));
+               cpu_set(0, per_cpu(cpu_core_map, 0));
                return;
        }
 
@@ -1163,16 +1160,16 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
        Dprintk("Boot done.\n");
 
        /*
-        * construct cpu_sibling_map[], so that we can tell sibling CPUs
+        * construct cpu_sibling_map, so that we can tell sibling CPUs
         * efficiently.
         */
        for (cpu = 0; cpu < NR_CPUS; cpu++) {
-               cpus_clear(cpu_sibling_map[cpu]);
-               cpus_clear(cpu_core_map[cpu]);
+               cpus_clear(per_cpu(cpu_sibling_map, cpu));
+               cpus_clear(per_cpu(cpu_core_map, cpu));
        }
 
-       cpu_set(0, cpu_sibling_map[0]);
-       cpu_set(0, cpu_core_map[0]);
+       cpu_set(0, per_cpu(cpu_sibling_map, 0));
+       cpu_set(0, per_cpu(cpu_core_map, 0));
 
        if (nmi_watchdog == NMI_LOCAL_APIC)
                check_nmi_watchdog();
@@ -1215,19 +1212,19 @@ remove_siblinginfo(int cpu)
        int sibling;
        struct cpuinfo_x86 *c = cpu_data;
 
-       for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
-               cpu_clear(cpu, cpu_core_map[sibling]);
+       for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
+               cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
                /*
                 * last thread sibling in this cpu core going down
                 */
-               if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+               if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
                        c[sibling].booted_cores--;
        }
                        
-       for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
-               cpu_clear(cpu, cpu_sibling_map[sibling]);
-       cpus_clear(cpu_sibling_map[cpu]);
-       cpus_clear(cpu_core_map[cpu]);
+       for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
+               cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
+       cpus_clear(per_cpu(cpu_sibling_map, cpu));
+       cpus_clear(per_cpu(cpu_core_map, cpu));
        phys_proc_id[cpu] = BAD_APICID;
        cpu_core_id[cpu] = BAD_APICID;
        cpu_clear(cpu, cpu_sibling_setup_map);
index 651dbcfdd3522fb60e50f6357280303a7a7f93a6..27bda34e6e7def7abb6bbe5323c921fc544b65af 100644 (file)
@@ -60,9 +60,9 @@ long arch_do_sysctl(
         memset(pi, 0, sizeof(*pi));
         pi->cpu_to_node = cpu_to_node_arr;
         pi->threads_per_core =
-            cpus_weight(cpu_sibling_map[0]);
+            cpus_weight(per_cpu(cpu_sibling_map, 0));
         pi->cores_per_socket =
-            cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
+            cpus_weight(per_cpu(cpu_core_map, 0)) / pi->threads_per_core;
         pi->nr_cpus = (u32)num_online_cpus();
         pi->nr_nodes = num_online_nodes();
         pi->total_pages = total_pages;
index 81a7efe213d9a5a068e6858566a5cf85bd0899dc..e21d56593753519f423d444deba75c8593a03e51 100644 (file)
@@ -167,18 +167,18 @@ static unsigned int default_vcpu0_location(void)
      * If we're on a HT system, we only auto-allocate to a non-primary HT. We 
      * favour high numbered CPUs in the event of a tie.
      */
-    cpu = first_cpu(cpu_sibling_map[0]);
-    if ( cpus_weight(cpu_sibling_map[0]) > 1 )
-        cpu = next_cpu(cpu, cpu_sibling_map[0]);
-    cpu_exclude_map = cpu_sibling_map[0];
+    cpu = first_cpu(per_cpu(cpu_sibling_map, 0));
+    if ( cpus_weight(per_cpu(cpu_sibling_map, 0)) > 1 )
+        cpu = next_cpu(cpu, per_cpu(cpu_sibling_map, 0));
+    cpu_exclude_map = per_cpu(cpu_sibling_map, 0);
     for_each_online_cpu ( i )
     {
         if ( cpu_isset(i, cpu_exclude_map) )
             continue;
-        if ( (i == first_cpu(cpu_sibling_map[i])) &&
-             (cpus_weight(cpu_sibling_map[i]) > 1) )
+        if ( (i == first_cpu(per_cpu(cpu_sibling_map, i))) &&
+             (cpus_weight(per_cpu(cpu_sibling_map, i)) > 1) )
             continue;
-        cpus_or(cpu_exclude_map, cpu_exclude_map, cpu_sibling_map[i]);
+        cpus_or(cpu_exclude_map, cpu_exclude_map, per_cpu(cpu_sibling_map, i));
         if ( !cnt || cnt[i] <= cnt[cpu] )
             cpu = i;
     }
index d0d31d9fff17ba06d79761421f3a6ae7c3be8672..c4e056b35b333d80950ea072921b0ea316d1b70a 100644 (file)
@@ -402,17 +402,17 @@ csched_cpu_pick(struct vcpu *vc)
 
         nxt = cycle_cpu(cpu, cpus);
 
-        if ( cpu_isset(cpu, cpu_core_map[nxt]) )
+        if ( cpu_isset(cpu, per_cpu(cpu_core_map, nxt)) )
         {
-            ASSERT( cpu_isset(nxt, cpu_core_map[cpu]) );
-            cpus_and(cpu_idlers, idlers, cpu_sibling_map[cpu]);
-            cpus_and(nxt_idlers, idlers, cpu_sibling_map[nxt]);
+            ASSERT( cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
+            cpus_and(cpu_idlers, idlers, per_cpu(cpu_sibling_map, cpu));
+            cpus_and(nxt_idlers, idlers, per_cpu(cpu_sibling_map, nxt));
         }
         else
         {
-            ASSERT( !cpu_isset(nxt, cpu_core_map[cpu]) );
-            cpus_and(cpu_idlers, idlers, cpu_core_map[cpu]);
-            cpus_and(nxt_idlers, idlers, cpu_core_map[nxt]);
+            ASSERT( !cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
+            cpus_and(cpu_idlers, idlers, per_cpu(cpu_core_map, cpu));
+            cpus_and(nxt_idlers, idlers, per_cpu(cpu_core_map, nxt));
         }
 
         weight_cpu = cpus_weight(cpu_idlers);
@@ -1205,9 +1205,9 @@ csched_dump_pcpu(int cpu)
     spc = CSCHED_PCPU(cpu);
     runq = &spc->runq;
 
-    cpumask_scnprintf(cpustr, sizeof(cpustr), cpu_sibling_map[cpu]);
+    cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_map, cpu));
     printk(" sort=%d, sibling=%s, ", spc->runq_sort_last, cpustr);
-    cpumask_scnprintf(cpustr, sizeof(cpustr), cpu_core_map[cpu]);
+    cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_map, cpu));
     printk("core=%s\n", cpustr);
 
     /* current VCPU */
index 2831c0460c597e1a16c2b3c2d96e28ae878c12fc..7380c773cf196fe17bb2c955bf3134e7feb0a9cb 100644 (file)
@@ -60,8 +60,8 @@ extern struct smp_boot_data {
 extern char no_int_routing __devinitdata;
 
 extern cpumask_t cpu_online_map;
-extern cpumask_t cpu_core_map[NR_CPUS];
-extern cpumask_t cpu_sibling_map[NR_CPUS];
+DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 extern int smp_num_siblings;
 extern int smp_num_cpucores;
 extern void __iomem *ipi_base_addr;
index 6d566d1a05116b02732d24d782716b957eb9f5c4..3bd3d6caeb235177ae701ef505f1098378d0b4fd 100644 (file)
@@ -32,8 +32,8 @@
  
 extern void smp_alloc_memory(void);
 extern int pic_mode;
-extern cpumask_t cpu_sibling_map[];
-extern cpumask_t cpu_core_map[];
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 
 void smp_send_nmi_allbutself(void);